from pymongo import MongoClient
import requests
from lxml import etree

head={
    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 Edg/107.0.1418.26"
}

def getoldhoomslist(url):
    tlist=[]
    rep=requests.get(url,headers=head).text
    html=etree.HTML(rep)
    hoom=html.xpath('/html/body/div[1]/div/div/section/section[3]/section[1]/section[2]/div')
    for web in hoom:
        mydict={
            "基本信息":web.xpath('./a/div[2]/div[1]/div[1]/h3/text()')[0],
            "规格":"".join(web.xpath('./a/div[2]/div[1]/section/div[1]/p[1]/span/text()')),
            "面积":str(web.xpath('./a/div[2]/div[1]/section/div[1]/p[2]/text()')[0]).replace(" ","").replace("\n",""),
            "朝向":web.xpath('./a/div[2]/div[1]/section/div[1]/p[3]/text()')[0],
            "楼层数":str(web.xpath('./a/div[2]/div[1]/section/div[1]/p[4]/text()')[0]).replace(" ","").replace("\n",""),
            "建造年份":str(web.xpath('./a/div[2]/div[1]/section/div[1]/p[5]/text()')[0]).replace(" ","").replace("\n",""),
            "所在地名":web.xpath('./a/div[2]/div[1]/section/div[2]/p[1]/text()')[0],
            "地址":"-".join(web.xpath('./a/div[2]/div[1]/section/div[2]/p[2]/span/text()')),
            "负责人":web.xpath('./a/div[2]/div[1]/div[2]/div/span[1]/text()')[0],
            "评分":web.xpath('./a/div[2]/div[1]/div[2]/div/span[2]/text()')[0],
            "公司":web.xpath('./a/div[2]/div[1]/div[2]/div/span[3]/text()')[0],
            "总价":"".join(web.xpath('./a/div[2]/div[2]/p[1]/span/text()')),
            "单价":web.xpath('./a/div[2]/div[2]/p[2]/text()')[0]
        }
        tlist.append(mydict)
    return tlist

def cunchu(weilist):
    client = MongoClient("mongodb://lwjbigdata:123456@localhost:27017/?authMechanism=DEFAULT&authSource=admin")
    collection = client["spiderwork"]["安居客二手房"]
    try:
        print("正在写入数据！")
        collection.insert_many(weilist)
    except:
        print("数据写入失败！")

for i in range(3):
    url=f"https://yongzhou.anjuke.com/sale/p{i+1}/?from=HomePage_TopBar"
    weilist=getoldhoomslist(url)
    cunchu(weilist)
